A Simple Backtest





Kerry Back

Linear regression from scikit-learn

from sqlalchemy import create_engine
import pymssql
import pandas as pd
from scikit-learn.linear_model import LinearRegression

model = LinearRegression()

Connect to the GHZ database

server = "mssql-82792-0.cloudclusters.net:16272"
username = "user"
password = "RiceOwls1912" 
database = "ghz"
string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database

conn = create_engine(string).connect()

Get data

df = pd.read_sql(
    """
    select date, ticker, acc, agr, beta, bm, ep, gma, idiovol,
    lev, mom12m, mom1m, mve, operprof, roeq, ret
    from data
    order by date, ticker
    """,
    conn
  )
df = df.dropna()
df = df.set_index(["date", "ticker"])

Split into train and test

later = df.index.get_level_values("date")>="2010-01"

train = df[~later]
test = df[later]

Train linear model

predictors = [
  "acc", "agr", "beta", "bm", "ep", "gma", "idiovol",
  "lev", "mom12m", "mom1m", "mve", "operprof", "roeq"
]

Xtrain = train[predictors]
ytrain = train["ret"]

model.fit(Xtrain, ytrain)

Regression coefficients

print("intercept =", model.intercept_)

coefs = pd.Series(model.coef_, index=predictors)
coefs
intercept = 0.04111274134329946
acc        -0.009423
agr        -0.002192
beta       -0.002897
bm          0.002487
ep         -0.002326
gma         0.005286
idiovol     0.041304
lev        -0.000529
mom12m     -0.003266
mom1m      -0.003978
mve        -0.002665
operprof    0.000013
roeq        0.000026
dtype: float64

Predict

Xtest = test[predictors]

ypredict = model.predict(Xtest)
ypredict = pd.Series(ypredict, index=test.index)

Sort

cut = lambda x: pd.qcut(x, 5, labels=range(1, 6))
quintiles = ypredict.groupby("date", group_keys=False).apply(cut)
quintiles.name = "quintile"

Compute returns

test = test.join(quintiles)

rets = test.reset_index().groupby(["date", "quintile"]).ret.mean()
rets = rets.unstack()
print(rets.head())
print(rets.mean())
quintile         1         2         3         4         5
date                                                      
2010-01  -0.027476 -0.027824 -0.011851  0.006101  0.041327
2010-02   0.044193  0.040953  0.036378  0.040231  0.033047
2010-03   0.076036  0.071594  0.081333  0.078276  0.098675
2010-04   0.053192  0.071345  0.071672  0.080754  0.123110
2010-05  -0.088657 -0.075424 -0.070076 -0.079732 -0.089493
quintile
1    0.011785
2    0.012492
3    0.012492
4    0.013040
5    0.014558
dtype: float64